
do "E:/02_code/00_environment/00_set_environment.do"

********Gen teacher by year VA measures 
****Merge with master build file and test accomodations file
clear
use "$basedata/course_mem_clean_rd.dta", clear

 keep birthdt schlcode year lea statecourse section meetingcode grade mastid teachid numstudents
 ren (*) *_rd
 
 ren mastid_rd mastid
 ren year_rd year
 
 tempfile temprd
 save `temprd', replace

use "$basedata/course_mem_clean.dta", clear
 keep birthdt schlcode year lea statecourse section meetingcode grade mastid teachid numstudents
 ren (*) *_ma
 
 ren mastid_ma mastid
 ren year_ma year
 
 
 sort mastid year
 
 merge 1:1 mastid year using `temprd'
 drop _m
 
 gen birthdt = birthdt_ma
 replace birthdt = birthdt_rd if birthdt==.
 replace birthdt = . if birthdt_ma!=birthdt_rd & birthdt_ma!=. & birthdt_rd!=.
 drop birthdt_*
 
 gen grade = grade_ma
 replace grade = grade_rd if grade==.
 replace grade = . if grade_ma!=grade_rd & grade_ma!=. & grade_rd!=.
 drop grade_*
 
 
merge 1:1 mastid year using "$basedata/mb_clean.dta"
ren _merge mrg_mb

* keep only the standard tests
replace score_ma = . if !inlist(ma_test_id,"RG","MA03","MA04","MA05","MA06","MA07","MA08")
replace score_rd = . if !inlist(rd_test_id,"RG","RD03","RD04","RD05","RD06","RD07","RD08")

 merge 1:1 mastid year using "$basedata/test_accom.dta"
ren _merge mrg_accom
xtset mastid year
******Generate age variable
	 gen age =  (mdy(6,30,year)-birthdt)/365.25
 replace age=0 if age==.
 
 egen teach_year_ma=group(teachid_ma year)
 egen teach_year_rd=group(teachid_rd year)
 egen teach_schl_ma=group(teachid_ma schlcode_ma lea_ma )
 egen teach_schl_rd=group(teachid_rd schlcode_rd lea_rd)
gen l_grade=l.grade
* GEN lagged scores, lagged squares, and lagged cubes
	* CREATE INTERACTIONS OF PRIOR TEST SCORES WITH GRADE LEVEL
	foreach i in ma rd {
cap	gen l_`i'=l.score_`i'
cap	gen l_`i'_sq= l_`i'^2
cap	gen l_`i'_cb= l_`i'^3
cap	gen l_`i'_miss=(l_`i'==.)
 cap xi i.l_grade|l_`i' i.l_grade|l_`i'_sq i.l_grade|l_`i'_cb  , prefix(IL`i')
}
**Focus on grade 4-8
drop if grade<4
drop if grade>8
****Generate teacher-by-school-by-studenttype-VA, teacher-by-year-VA, teacher-by-school VA
gen lag_ma=l_ma
gen lag_rd=l_rd


egen teach_DISADV_yr_ma=group(teachid_ma DISADV year)
egen teach_DISADV_yr_rd=group(teachid_rd DISADV year)
egen teach_DISADV_schl_ma=group(teachid_ma DISADV schlcode_ma lea_ma)
egen teach_DISADV_schl_rd=group(teachid_rd DISADV schlcode_rd lea_rd)

egen section_size_ma=sum(1), by(year schlcode_ma lea_ma  section_ma teachid_ma)
egen section_size_rd=sum(1), by(year schlcode_rd lea_rd  section_rd teachid_rd)
****class demographics
gen white=(ethnic==5)
gen black=(ethnic==4)
gen hispanic=(ethnic==3)

foreach ss in ma rd {
foreach i in white black hispanic female disability DISADV ENG_LEARN {
	egen class_mn_`i'_`ss'=mean(`i'), by(year schlcode_`ss' lea_`ss'  section_`ss' teachid_`ss')
		egen schl_mn_`i'_`ss'=mean(`i'), by(year schlcode_`ss' lea_`ss')

}
}

****Generate indicators for data missingness
 foreach i in grade ethnic female gifted disability MIGRANT ENG_LEARN DISADV DISADV_persist year read_accom math_accom numstudents_ma numstudents_rd {
 cap	gen `i'_miss=(`i'==.)
	replace `i'=0 if `i'==.
 }
 replace numstudents_ma=section_size_ma if numstudents_ma==0
 replace numstudents_rd=section_size_rd if numstudents_rd==0
 
 egen sidx_ma = group(schlcode_ma lea_ma)
 egen sidx_rd = group(schlcode_rd lea_rd)


local student_cov2 i.grade i.ethnic i.female i.gifted i.disability i.MIGRANT i.ENG_LEARN  i.DISADV_persist read_accom math_accom  *_miss i.year#i.grade age


cap egen schid_ma= group(schlcode_ma lea_ma)
cap egen schid_rd= group(schlcode_rd lea_rd)

tempfile tempbuild
save `tempbuild', replace

foreach ss in ma rd {

use `tempbuild', clear

***Focus sample 
qui  areg score_`ss' IL* `student_cov2' numstudents_`ss' , absorb(teach_year_`ss')
predict VA_1yr, d
 cap gen samp=e(sample)
 drop if samp!=1
 cap  egen stu_count =sum(samp), by (teachid_`ss' year)
 capture egen tag_tch_sch_yr=tag(teachid_`ss' year schlcode_`ss' lea_`ss')
 cap egen tchr_schls=sum(tag_tch_sch_yr), by(teachid_`ss' year)

 ****Drop teachers who teach SPED and ELL

 drop if class_mn_disability_`ss'>.5
 drop if class_mn_ENG_LEARN_`ss'>.5


 
  ********************************************************************
 ****Generate teacher school EB VA measures

  capture egen tid=group(teachid_`ss')
  cap  egen tidy=group(year schlcode_`ss' lea_`ss' section_`ss'  teachid_`ss' )
  cap egen sch_by_yr=group(schlcode_`ss' lea_`ss' year)
 cap  gen ncerdc_id=teachid_`ss'
cap gen sy=year
  merge m:1 ncerdc_id sy using "$basedata/ncerdc_experience.dta" 
  drop if _merge==2
  drop _merge 
local student_cov2 i.grade i.ethnic i.female i.gifted i.disability i.MIGRANT i.ENG_LEARN  i.DISADV_persist read_accom math_accom  *_miss i.year#i.grade age

	gen vijt=.
	gen sample=.
 areg score_`ss' IL* `student_cov2' numstudents_`ss', absorb(tidy)
replace sample=1 if e(sample)==1
 predict temp if e(sample)==1, d 
predict  temp1 if e(sample)==1, r
replace vijt=temp+temp1  if e(sample)==1
drop temp1 temp


save "$temp/student_level_`ss'_w_unshrunken_VA_homogeneous", replace	

}


foreach ss in ma { 

use "$temp/student_level_`ss'_w_unshrunken_VA_homogeneous", clear

cap drop lea schlcode


gen expdum = tchr_exp_pay_level
replace expdum = 6 if tchr_exp_pay_level>6 & tchr_exp_pay_level!=.
replace expdum = 7 if tchr_exp_pay_level==.
assert expdum<=7 & floor(expdum)==expdum

ren schid_`ss' schid
ren lea_`ss' lea
ren schlcode_`ss' schlcode
ren teachid_`ss' teachid
ren section_`ss' section
ren sidx_`ss' sidx

preserve



gen numstudentstidy = 1
collapse (mean) exp* sy tchr_exp_pay_level vijt DISADV (sum) numstudentstidy, by(tid sch_by_yr schid tidy)


reghdfe vijt i.expdum [fw=numstudentstidy], absorb(tch_e0_sfe=tid yfe=sy sfe=schid) res(resid_tch_sfe)


***predict only the fitted experience profile
	predict xb,xb
	cap gen VA_tchr_exp_sfe= xb+tch_e0_sfe

	collapse (mean) xb VA_tchr_exp_sfe sfe, by(tid sch_by_yr schid tidy)
	tempfile tempVA
	save `tempVA', replace
restore

sort tid sch_by_yr schid tidy
merge n:1 tid sch_by_yr schid tidy using `tempVA'
assert _m==3
drop _m

* construct class variable
egen jtidx = group(teachid year schlcode lea)
sort jtidx section
gen newsection = jtidx!=jtidx[_n-1] | section!=section[_n-1]
gen secnum = 1 if jtidx!=jtidx[_n-1]
replace secnum = secnum[_n-1]+newsection if secnum==.

gen j = teachid // teacher index
gen c = secnum // class index (numbered 1 to C, reset for each teacher-year)
gen m = 1*(DISADV==0) + 2*(DISADV==1) // student types (numered 1 to M)
gen t = year // year
gen r = vijt - xb - sfe // student-level residual: original residual minus experience effects minus school-year FEs
gen alphaZ = xb // estimated experience effect (from prior step), varies by teacher-year
gen e = tchr_exp_pay_level
gen s = sidx

keep j c m t r alphaZ e s schlcode lea 

qui summ t
global tmin = r(min)
global tmax = r(max)

global nt = $tmax - $tmin


** estimate the variance of the student error, by type

* take the student residual, minus the classroom mean, by type
* square the residual and take the mean across the data (mean residual is 0, so square = variance)

bys j c t: egen mean_resid_ct = mean(r)
bys j c t: egen n_resid_ct = count(r)
gen resid_i_sq = (r-mean_resid_ct)^2
gen resid_i_sq_c = resid_i_sq * n_resid_ct/(n_resid_ct-1)

qui summ resid_i_sq_c
gen sigma2_eps = r(mean)



** collapse the student residuals to the teacher-classroom-type level 
* calculate the mean and the number of observations

collapse (count) n_ct=r (mean) Abar=r alphaZ sigma* e, by(j c t s schlcode lea)

** estimate the covariances and variances necessary for the structural parameters

* cov(\bar{A}_{jmct},\bar{A}_{jmc't}) for m=1,2
* will just keep classrooms 1 and 2. can add more for precision


preserve

gen Abar_c1 = Abar if c==1
gen Abar_c2 = Abar if c==2

collapse (mean) Abar_c1 Abar_c2, by(j /*m*/ t s)

keep if Abar_c1!=. & Abar_c2!=.

corr Abar_c1 Abar_c2, covariance
local sigma2_muj = r(cov_12)

restore

gen sigma2_muj = `sigma2_muj'


* Var(\bar{A}_{jmct})

qui summ Abar
gen VarAbar = r(Var)

gen temp = VarAbar - sigma2_muj - (sigma2_eps / n)

qui summ temp
gen sigma2_theta = r(mean)


drop temp*


* estimate drift parameters, by comparing across years (sometimes within, sometimes across type)
* estimate \sigma_{\mu_1,|t-s|}, \sigma_{\mu_2,|t-s|}, \sigma_{\mu_1 \mu_2,|t-s|}

preserve


gen Abar_wgt = Abar*n_ct // want to calculate mean residual, weighted by number of students

collapse (sum) n_ct Abar_wgt, by(j t)

gen Abar = Abar_wgt / n_ct

xtset j t

local nt = $nt

forv ts=1/`nt' {
	gen Abar_L`ts' = l`ts'.Abar
	
	corr Abar Abar_L`ts', covariance
	local cov_`ts' = r(cov_12)
	
	drop Abar_L`ts'

}
restore

local nt = $nt

forv ts=1/`nt' {
	gen sigma_`ts' = `cov_`ts''
}





** estimate the reduced form parameters


* start by collapsing the data so that types are in columns, not separate rows


collapse (mean) n_ct Abar alphaZ sigma* e, by(j c t s schlcode lea)

assert s!=.
bys j t: egen mins = min(s)
bys j t: egen maxs = max(s)
gen multi_school = mins!=maxs
drop mins maxs

sort j s multi_school
gen teach_school = 1 if j!=j[_n-1] & multi_school!=1
replace teach_school = teach_school[_n-1] + (s!=s[_n-1]) if teach_school==. & multi_school!=1

sort j t s multi_school
gen teach_school_order = 1 if j!=j[_n-1] & multi_school!=1
replace teach_school_order = teach_school_order[_n-1] + (s!=s[_n-1]) if teach_school_order==. & multi_school!=1

save "$temp/VAstructural_homogeneous_`ss'", replace

qui summ t
global tmin_`ss' = r(min)
global tmax_`ss' = r(max)

qui summ teach_school
global smin_`ss' = r(min)
global smax_`ss' = r(max)

qui summ teach_school_order
global sminorder_`ss' = r(min)
global smaxorder_`ss' = r(max)

use "$temp/VAstructural_homogeneous_`ss'", replace

collapse (mean) multi_school teach_school teach_school_order, by(j s t schlcode lea)

save "$temp/VAschoolindices_homogeneous_`ss'", replace

}



cap program drop blp_va
program define blp_va

di "Shrinking VA for Subject `1', VA Type `2', VA Type Detail `3'"

use "$temp/VAstructural_homogeneous_`1'", clear

gen sigma_0 = sigma2_muj

* save experience measures to merge on at end

preserve

if "`2'" == "career" {
	keep if t==`3'
	collapse (mean) alphaZ e sigma*, by(j t)
}
if "`2'" == "loY" {
	keep if t==`3'
	collapse (mean) alphaZ e, by(j t)
}
if "`2'" == "preY" {
	keep if t==`3'
	collapse (mean) alphaZ e, by(j t)
}
if "`2'" == "loS" {
	keep if t==`3'
	collapse (mean) alphaZ e, by(j t teach_school)
}
if "`2'" == "preS" {
	keep if t==`3'
	collapse (mean) alphaZ e, by(j t teach_school_order)
}


tempfile tempjt
save `tempjt', replace
restore

* choose sample

if "`2'" == "loY" {
	keep if t!=`3'
}
if "`2'" == "preY" {
	keep if t<`3'
}
if "`2'" == "loS" {
	gen teach_school_t = teach_school if t==`3'
	bys j: egen teach_school_t_all = min(teach_school_t)
	keep if teach_school!=teach_school_t_all
}
if "`2'" == "preS" {
	gen teach_school_order_t = teach_school_order if t==`3'
	bys j: egen teach_school_order_t_all = min(teach_school_order_t)
	keep if teach_school_order<teach_school_order_t_all
}


* construct some weights based on sample sizes

gen n_ct_sq = n_ct^2

gen Abar_wgt = Abar*n_ct // want to calculate mean residual, weighted by number of students

collapse (sum) n_ct n_ct_sq Abar_wgt (mean) alphaZ sigma* e, by(j t)

gen Abar = Abar_wgt / n_ct

gen w11 = n_ct_sq / n_ct^2

assert w11<=1 if w11!=.

* loop over teacher-years

gen st = t-`3'
gen abs_st = abs(st)
gen drift_st = min(abs_st,`4')

keep if Abar != .

egen teacher_index = group(j)

qui summ teacher_index
local n = r(max)

tempfile temploop tempstore

save `temploop', replace

forv nn=1/`n' {

	use `temploop', clear

	keep if teacher_index==`nn'
	
	local nj = _N
	matrix A = J(`nj',1,.)
	matrix gamma = J(`nj',1,.)
	matrix Gamma = J(`nj',`nj',.)
	forv aa=1/`nj' {
	
		local dd = drift_st[`aa']
	
		matrix A[`aa',1] = Abar[`aa']
		
		matrix gamma[`aa',1] = sigma_`dd'[`aa']
		
		matrix Gamma[`aa',`aa'] = sigma2_muj[`aa'] + sigma2_theta[`aa'] * w11[`aa'] + sigma2_eps[`aa'] / n_ct[`aa']
		
		
		forv bb=1/`nj' {
			
			if `aa' != `bb' {
	
				local drift_diff = min(abs(t[`aa']-t[`bb']),`4')
				matrix Gamma[`aa',`bb'] = sigma_`drift_diff'[`aa']
			}
		
		}
	}

	if det(Gamma)>0 {
	
		cap matrix psi = invsym(Gamma)*gamma
	
		cap matrix VA = psi'*A
	
		gen mu_t_hat = VA[1,1]
	}
	else if det(Gamma)<=0 {
		gen mu_t_hat = .
	}
	
	keep j mu_t_hat
	keep if _n==1
	
	if `nn'>1 {
		append using `tempstore'
	}
	sort j
	save `tempstore', replace
	
}

use `tempstore', clear
sort j

merge 1:n j using `tempjt'
drop if _m==1 | _m==2 // double check this after adding back missings
drop _m

** add back in experience effect

gen mu_jt_hat = mu_t_hat + alphaZ

local extravars = "" 

if "`2'" == "loS" {
	local extravars = "teach_school"
}
if "`2'" == "preS" {
	local extravars = "teach_school_order"
}


keep j t mu_jt_hat mu_t_hat `extravars'
sort j t

ren (mu_jt_hat mu_t_hat) ///
 (mu_jt_hat_`2' mu_t_hat_`2')


save "$temp/blp_va_homogeneous_`1'_`2'_`3'_drift", replace

end

* 1: subject
* 2: sample description: "career" "loY" "loS" "preY" "preS"
* 3: sample specific value
* 4: drift limit

foreach ss in ma { 
	
		local tmin = ${tmin_`ss'}
		local tmax = ${tmax_`ss'}
	
		forv tt=`tmin'/`tmax' {
			
			
			blp_va `ss' loY `tt' 8
			
			if `tt' > `tmin' {
				blp_va `ss' preY `tt' 8
			}
			

		}


}
stop
** put data back together into one file

foreach ss in ma rd {
	
		local tmin = ${tmin_`ss'}
		local tmax = ${tmax_`ss'}

		clear
		forv tt=`tmin'/`tmax' {
			append using "$temp/blp_va_homogeneous_`ss'_loY_`tt'_drift"
		}
		save "$temp/blp_va_homogeneous_`ss'_loY_drift", replace
	
		clear
		forv tt=`tmin'/`tmax' {
			if `tt' > `tmin' {
				append using "$temp/blp_va_homogeneous_`ss'_preY_`tt'_drift"
			}
		}
		save "$temp/blp_va_homogeneous_`ss'_preY_drift", replace

	
}

foreach ss in ma {
	use "$temp/VAschoolindices_homogeneous_`ss'", clear

	sort j t
	merge n:1 j t using "$temp/blp_va_homogeneous_`ss'_loY_drift"
	*assert _m!=2
	drop _m

	sort j t
	merge n:1 j t using "$temp/blp_va_homogeneous_`ss'_preY_drift"
	assert _m!=2
	drop _m
	
	ren (*) =_`ss'
	ren (j_`ss' t_`ss' s_`ss') (j t s)
	
	save "$temp/blp_va_homogeneous_`ss'_drift", replace
}



foreach ss in ma {
	use "$temp/VAstructural_homogeneous_`ss'", clear
	gen Abar_wgt = Abar*n_ct // want to calculate mean residual, weighted by number of students

	collapse (sum) n_ct Abar_wgt (mean) sigma* e, by(j t s schlcode lea)

	gen Abar = Abar_wgt / n_ct
	
	drop *_wgt
	ren (Abar n_ct sigma* e) =_`ss'
	
	tempfile temp`ss'
	save `temp`ss'', replace
}


use "$temp/blp_va_homogeneous_ma_drift"

sort j t s
merge 1:1 j t s using `tempma'
assert _m==3
drop _m


save "$basedata/va_homogeneous_estimates_drift", replace
